knitr::opts_chunk$set(echo = TRUE)
libinstall <- function(pkg) {
if(!require(pkg, character.only = T))install.packages(pkg)
library(pkg, character.only = T)
}
libinstall("tidyverse")
## Loading required package: tidyverse
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
libinstall("glue")
## Loading required package: glue
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
libinstall("readr")
libinstall("plotly")
## Loading required package: plotly
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
libinstall("readr")
libinstall("readxl")
## Loading required package: readxl
libinstall("lubridate")
## Loading required package: lubridate
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
libinstall("curl")
## Loading required package: curl
##
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
##
## parse_date
# Download cpsaat data
curl_download("https://www.bls.gov/cps/cpsaat11.xlsx", destfile = "cpsaat11.xlsx")
# Import cpsaat
cpsaat11 <- read_excel(
"cpsaat11.xlsx",
col_names = c(
"Occupation",
"Total",
"Women",
"White",
"Black/African American",
"Asian",
"Hispanic/Latino"
),
na = "–",
col_types = c(
Occupation="text",
Total="numeric",
"Women"="numeric",
"White"="numeric",
"Black/African American"="numeric",
"Asian"="numeric",
"Hispanic/Latino"="numeric"
),
skip = 7
)%>%
drop_na(Occupation)
Labor_force_participation <- read_csv("EPI Data Library - Labor-force participation rate.csv", col_types = c(.default="n", Date="c"))
Medianaverage_hourly_wages <- read_csv("EPI Data Library - Medianaverage hourly wages.csv", col_types = c(.default="n"))
Minimum_wage <- read_csv("EPI Data Library - Minimum wage.csv", col_types = c(.default="n"))
cpsaat11
## # A tibble: 596 x 7
## Occupation Total Women White `Black/African A~ Asian `Hispanic/Latin~
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Total, 16 years ~ 147795 46.8 78 12.1 6.4 17.6
## 2 Management, prof~ 63644 51.7 78.7 9.7 8.6 10.4
## 3 Management, busi~ 27143 44.6 81.7 8.8 6.7 10.9
## 4 Management occup~ 18564 40.4 83.4 8 5.8 10.7
## 5 Chief executives 1669 29.3 88 4.3 5.4 7.4
## 6 General and oper~ 1057 30.5 84.4 7.1 4.5 12.4
## 7 Legislators 25 NA NA NA NA NA
## 8 Advertising and ~ 56 52.1 80.5 14.7 3.9 3.5
## 9 Marketing manage~ 554 60.7 84.1 5.5 7.6 9.9
## 10 Sales managers 521 30.9 87.6 5.8 4.2 7.6
## # ... with 586 more rows
Looks fine.
Labor_force_participation
## # A tibble: 513 x 13
## Date All Women Men Black `Black Women` `Black Men` Hispanic
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Sep-~ 62.2 56.6 68.1 61.2 59.6 63.2 66
## 2 Aug-~ 62.3 56.8 68.2 61.5 59.8 63.5 66.2
## 3 Jul-~ 62.4 56.9 68.3 61.6 59.9 63.7 66.3
## 4 Jun-~ 62.6 57 68.5 61.8 60.1 63.9 66.5
## 5 May-~ 62.7 57.1 68.7 62 60.3 64 66.6
## 6 Apr-~ 62.9 57.3 68.9 62.3 60.5 64.3 66.7
## 7 Mar-~ 63.1 57.5 69.1 62.6 60.8 64.7 67
## 8 Feb-~ 63.1 57.5 69.2 62.6 60.9 64.7 67
## 9 Jan-~ 63.1 57.5 69.2 62.6 60.9 64.8 66.8
## 10 Dec-~ 63.1 57.4 69.2 62.7 60.8 64.8 66.8
## # ... with 503 more rows, and 5 more variables: `Hispanic Women` <dbl>,
## # `Hispanic Men` <dbl>, White <dbl>, `White Women` <dbl>, `White Men` <dbl>
Labor_force_participation=Labor_force_participation%>%
mutate(Date=lubridate::my(Date))
Participation=Labor_force_participation%>%
pivot_longer(-c(Date), names_to = "Race", values_to = "Participation", values_drop_na = T)%>%
separate(Race, into = c("Race", "Gender"))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 3012 rows [1, 2,
## 3, 4, 7, 10, 13, 14, 15, 16, 19, 22, 25, 26, 27, 28, 31, 34, 37, 38, ...].
Participation=Participation%>%
filter(grepl("Women|Men", Race, ignore.case = T))%>%
mutate(
Gender=Race,
Race=NA_character_
)%>%
union(
Participation%>%
filter(!grepl("Women|Men", Race, ignore.case = T))
)
rm(Labor_force_participation)
#adjust for inflation to get to common 2019
Minimum_wage=Minimum_wage%>%
mutate(
`Min2019`=priceR::adjust_for_inflation(
`Federal minimum wage (real 2018 dollars)`,
2018,
"US",
2019
)
)
## Retrieving countries data
## Generating URL to request all 297 results
## Retrieving inflation data for US
## Generating URL to request all 61 results
Minimum_wage=Minimum_wage%>%
rename(MinCur=`Federal minimum wage (nominal dollars)`)%>%
select(Min2019, MinCur, Date)
g=Wages%>%
ggplot(aes(col=Race, x=Date))+
geom_line(aes(y=Average))+
geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
ggplot(aes(col=Race, x=Date))+
geom_line(aes(y=Median))+
geom_line(aes(y=Min2019, col=NULL), data=Minimum_wage, size=2)+
facet_wrap(~Gender)
ggplotly(g)
g=Wages%>%
ggplot()+
geom_point(aes(x=Median, y=Average, col=Race, shape=Gender, frame=Date))+
ggtitle("Median vs Average Wage per Race and Gender over Time")
## Warning: Ignoring unknown aesthetics: frame
ggplotly(g)